import pandas as pd
import numpy as np
import panel as pn
import hvplot.pandas
import matplotlib.pyplot as plt
import holoviews as hv
from holoviews import opts
from scipy import optimize
import warnings
warnings.filterwarnings('ignore')
evidences = pd.read_csv('../output/evidence_aligned.csv')
evidences['lastAA'] = evidences['Sequence'].str[-1:]
evidences_trp = evidences[evidences.lastAA.isin(['K', 'R'])]
# Calculate trend line functions
CCS_fit_charge2 = evidences[evidences['Charge'] == 2]
CCS_fit_charge3 = evidences[evidences['Charge'] == 3]
CCS_fit_charge4 = evidences[evidences['Charge'] == 4]
def trendline_func(x, a, b):
return a * np.power(x, b)
params_charge2, params_covariance_charge2 = optimize.curve_fit(
trendline_func, CCS_fit_charge2['m/z'], CCS_fit_charge2['CCS'])
params_charge3, params_covariance_charge3 = optimize.curve_fit(
trendline_func, CCS_fit_charge3['m/z'], CCS_fit_charge3['CCS'])
params_charge4, params_covariance_charge4 = optimize.curve_fit(
trendline_func, CCS_fit_charge4['m/z'], CCS_fit_charge4['CCS'])
aminoacids = 'A R N D C Q E G H I L K M F P S T W Y V'.split()
# Figure 3
y_label = 'CCS (\u212b\u00B2)'
select_aa = pn.widgets.Select(name='Select an amino acid', options=aminoacids)
@pn.depends(select_aa.param.value)
def calculate_aa_position(aa):
evidences_trp_aa = evidences_trp[evidences_trp.Sequence.str.contains(aa)]
positions = []
for sequence in evidences_trp_aa['Sequence']:
pos = np.array([pos for pos, char in enumerate(sequence) if char == aa])
vector = pos - np.median(range(len(sequence)))
relpos = np.sum(vector) / len(sequence)
positions.append(relpos)
evidences_trp_aa['pos'] = positions
scatter_plot = evidences_trp_aa.hvplot.points(x='m/z', y='CCS', c='pos',
xlabel='m/z', ylabel=y_label, clabel= 'Position',
width=450, height=400, cmap=plt.get_cmap('RdYlBu'),
tools=['hover'], colorbar=True, rasterize=True)
scatter_plot.opts(
toolbar='above', clim=(-1, 1)
)
opts.defaults(opts.Curve(color = "black", line_width=0.5, line_dash='dashed'))
trendline_x = np.arange(300,1800,1)
trendline_charge2 = hv.Curve((trendline_x, trendline_func(trendline_x, params_charge2[0], params_charge2[1])))
trendline_charge3 = hv.Curve((trendline_x, trendline_func(trendline_x, params_charge3[0], params_charge3[1])))
trendline_charge4 = hv.Curve((trendline_x, trendline_func(trendline_x, params_charge4[0], params_charge4[1])))
return (scatter_plot * trendline_charge2 * trendline_charge3 * trendline_charge4)
layout = pn.Column(
pn.pane.Markdown(
"""### Figure 3. A global view on peptide cross sections. c, Subset of peptides with C-terminal arginine or lysine colored by the relative position of amino acids.""",
margin=(10, 0, 20, 0)
),
pn.Row(
pn.layout.VSpacer(width=110),
pn.Column(
select_aa,
pn.pane.Markdown(
"The position of the selected amino acid in the peptide sequence: <br/>from C-term(-1) to N-term(+1)."
)
),
calculate_aa_position
),
align='center',
width=970
)
layout.embed()